/*
experiment: speed up.
*/

#include "head.h"
#include "tools.h"
#include "function.h"
#include "kmeans.h"
#include "coreset.h"

double Mean(vector<double> a) {
	double sum = 0;
	for (double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a) {
	double ret = 0;
	for (double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a) {
	double ret = 1;
	for (double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a) {
	double ret = 0, ave = Mean(a);
	for (double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}



int main(int argc, char** argv) {

	if (argc < 7) {
		printf("6 arguments are required (data_name, data_size, data_dim, num_of_centers, num_of_outliers,threshold).");
		return 0;
	}
	FILE* FIN = fopen(argv[1], "r");
	//N : number of datapoints;
	//D : dimension;
	//k : number of clusters;
	//m : number of outliers;
	//thr : determine how the rings and groups are formed (see Implementation Details in Section 5 of our paper);

	int N = atoi(argv[2]), D = atoi(argv[3]), k = atoi(argv[4]), m = atoi(argv[5]);
	double thr = atof(argv[6]);
	int z = 1;
	dataset X;
	cerr << "-----------Reading Data-----------" << endl;
	for (int i = 1; i <= N; i++) {
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for (int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr << "-----------Reading Over-----------" << endl;


	int T = 10;
	double ave_our_construct_ti = 0, ave_our_lloyd_ti = 0;
	double ave_s_construct_ti = 0, ave_s_lloyd_ti = 0;
	double ave_ti = 0;
	vector<double> ores, res, sres;
	dataset OurCoreset, HLLW25Coreset;
	int mm = m; //mm : size of coreset minus number of outliers, that is, we construct a coreset with size m + mm;
	for (int t = 1; t <= T; t++) {
		double ti = clock();
		dataset C = kmeansm(X, k, m, 100, -1);
		double Xti = (clock() - ti) / (double)CLOCKS_PER_SEC;
		double xcost = rcost(X, C, m, z);
		ave_ti += Xti;
		res.push_back(xcost);
		//cerr << Xti << " " << xcost << endl;

		ti = clock();
		OurCoreset = Our_Coreset(X, Kmeans(X, k), m, mm, z, thr);
		double our_construct_ti = (clock() - ti) / (double)CLOCKS_PER_SEC;

		ti = clock();
		C = kmeansm(OurCoreset, k, m, 100, -1);
		double our_lloyd_ti = (clock() - ti) / (double)CLOCKS_PER_SEC;

		double ocost = rcost(X, C, m, z);

		ti = clock();
		HLLW25Coreset = HLLW25_Coreset(X, Kmeans(X, k), m, mm, z, thr);
		double s_construct_ti = (clock() - ti) / (double)CLOCKS_PER_SEC;

		ti = clock();
		C = kmeansm(HLLW25Coreset, k, m, 100, -1);
		double s_lloyd_ti = (clock() - ti) / (double)CLOCKS_PER_SEC;

		double scost = rcost(X, C, m, z);

		ave_our_construct_ti += our_construct_ti;
		ave_our_lloyd_ti += our_lloyd_ti;
		ave_s_construct_ti += s_construct_ti;
		ave_s_lloyd_ti += s_lloyd_ti;

		ores.push_back(ocost);
		sres.push_back(scost);
		cerr << "Round " << t << ". running on original dataset : " << Xti << "; cost : " << xcost << endl;
		cerr << "Round " << t << ". ours, construction time : " << our_construct_ti << "; Lloyd time : " << our_lloyd_ti << "; cost : " << ocost << endl;
		cerr << "Round " << t << ". HLLW25, construction time : " << s_construct_ti << "; Lloyd time : " << s_lloyd_ti << "; cost : " << scost << endl;


	}
	cerr << "Testing end." << endl;
	cerr << "Lloyd on original dataset , time : " << ave_ti / T << "; cost : " << Mean(res) << endl;
	cerr << "ours, total time : " << ave_our_lloyd_ti / T + ave_our_construct_ti / T << "; construction time : " << ave_our_construct_ti / T << "; Lloyd time : " << ave_our_lloyd_ti / T << "; cost : " << Mean(ores) << endl;
	cerr << "HLLW25, total time : " << ave_s_lloyd_ti / T + ave_s_construct_ti / T << "; construction time : " << ave_s_construct_ti / T << "; Lloyd time : " << ave_s_lloyd_ti / T << ";; cost : " << Mean(sres) << endl;

	return 0;
}